import requests
import re
import pandas as pd
url = 'https://www.bookschina.com/book_find2/?stp=Python&sCate=0'
headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36'}
response = requests.get(url=url, headers=headers)
response.encoding = response.apparent_encoding
code = response.text
p_name = '<h2 class="name"><a href=".*?" target="_blank" title="(.*?)">'
name_list = re.findall(p_name, code)
p_date = '<span class="pulishTiem" title="出版时间">(.*?)&nbsp;&nbsp;/&nbsp;&nbsp;</span>'
date_list = re.findall(p_date, code)
p_press = 'class="publisher">(.*?)</a>'
press_list = re.findall(p_press, code)
p_price = '<span class="priceTit">定价:</span><del class="">&yen;(.*?)</del>'
price_list = re.findall(p_price, code)
p_sale = '<span class="sellPrice">&yen;(.*?)</span><span class="discount">'
sale_list = re.findall(p_sale, code)
data = {'书名': name_list, '出版时间': date_list, '出版社': press_list, '定价': price_list, '售价': sale_list}
data = pd.DataFrame(data)
data.to_excel('图书数据(单页).xlsx', index=False)
